# title: "QC_Justin_Seurat"
# output: html_document
# date: "2023-06-28"

# changed sperm filter to more stringent, < 3 to == 0

library(SingleCellExperiment)
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
library(Seurat)
## The legacy packages maptools, rgdal, and rgeos, underpinning this package
## will retire shortly. Please refer to R-spatial evolution reports on
## https://r-spatial.org/r/2023/05/15/evolution4.html for details.
## This package is now running under evolution status 0
## Attaching SeuratObject
## 
## Attaching package: 'Seurat'
## The following object is masked from 'package:SummarizedExperiment':
## 
##     Assays
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%() masks IRanges::%within%()
## ✖ dplyr::collapse()     masks IRanges::collapse()
## ✖ dplyr::combine()      masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count()        masks matrixStats::count()
## ✖ dplyr::desc()         masks IRanges::desc()
## ✖ tidyr::expand()       masks S4Vectors::expand()
## ✖ dplyr::filter()       masks stats::filter()
## ✖ dplyr::first()        masks S4Vectors::first()
## ✖ dplyr::lag()          masks stats::lag()
## ✖ ggplot2::Position()   masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce()       masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename()       masks S4Vectors::rename()
## ✖ lubridate::second()   masks S4Vectors::second()
## ✖ lubridate::second<-() masks S4Vectors::second<-()
## ✖ dplyr::slice()        masks IRanges::slice()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(Matrix)
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## The following object is masked from 'package:S4Vectors':
## 
##     expand
library(scales)
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
library(cowplot)
## 
## Attaching package: 'cowplot'
## 
## The following object is masked from 'package:lubridate':
## 
##     stamp
library(RCurl)
## 
## Attaching package: 'RCurl'
## 
## The following object is masked from 'package:tidyr':
## 
##     complete
setwd("~/OneDrive - University of Florida/Schnitzler_Lab/Jingwei/Whitney/Manuscript/10X_/Justin2019")

# Create a Seurat object for each sample
for (file in c("3XPBS_filtered_feature_bc_matrix", "Seawater_filtered_feature_bc_matrix"
               )){
  seurat_data <- Read10X(data.dir = paste0("data/", file))
  seurat_obj <- CreateSeuratObject(counts = seurat_data, 
                                   min.features = 100, 
                                   project = file)
  assign(file, seurat_obj)
}


# Merge samples into a single Seurat object
merged_seurat <- merge(x=`3XPBS_filtered_feature_bc_matrix`,
                       y=Seawater_filtered_feature_bc_matrix,
                       add.cell.id = c("3XPBS", "Seawater"))

# Check that the merged object has the appropriate sample-specific prefixes
head(merged_seurat@meta.data)
##                                                orig.ident nCount_RNA
## 3XPBS_AAACCCAAGACATCAA-1 3XPBS_filtered_feature_bc_matrix       2673
## 3XPBS_AAACCCAAGGGTAATT-1 3XPBS_filtered_feature_bc_matrix       1366
## 3XPBS_AAACCCAAGGTGCGAT-1 3XPBS_filtered_feature_bc_matrix       6805
## 3XPBS_AAACCCACAGCTGTAT-1 3XPBS_filtered_feature_bc_matrix       2268
## 3XPBS_AAACCCACATCGGCCA-1 3XPBS_filtered_feature_bc_matrix       4069
## 3XPBS_AAACCCACATGAATCC-1 3XPBS_filtered_feature_bc_matrix       3287
##                          nFeature_RNA
## 3XPBS_AAACCCAAGACATCAA-1          710
## 3XPBS_AAACCCAAGGGTAATT-1          397
## 3XPBS_AAACCCAAGGTGCGAT-1         1460
## 3XPBS_AAACCCACAGCTGTAT-1         1147
## 3XPBS_AAACCCACATCGGCCA-1          755
## 3XPBS_AAACCCACATGAATCC-1         1037
tail(merged_seurat@meta.data)
##                                                      orig.ident nCount_RNA
## Seawater_TTTGGTTAGTTTCGGT-1 Seawater_filtered_feature_bc_matrix        598
## Seawater_TTTGGTTGTACTGACT-1 Seawater_filtered_feature_bc_matrix       2672
## Seawater_TTTGTTGAGTCCGTCG-1 Seawater_filtered_feature_bc_matrix       2268
## Seawater_TTTGTTGAGTGCTACT-1 Seawater_filtered_feature_bc_matrix       3112
## Seawater_TTTGTTGGTCAACACT-1 Seawater_filtered_feature_bc_matrix        660
## Seawater_TTTGTTGGTCCAAGAG-1 Seawater_filtered_feature_bc_matrix       4405
##                             nFeature_RNA
## Seawater_TTTGGTTAGTTTCGGT-1          432
## Seawater_TTTGGTTGTACTGACT-1         1466
## Seawater_TTTGTTGAGTCCGTCG-1          578
## Seawater_TTTGTTGAGTGCTACT-1         1224
## Seawater_TTTGTTGGTCAACACT-1          270
## Seawater_TTTGTTGGTCCAAGAG-1         1886
### QC ###

# Add number of genes per UMI for each cell to metadata
merged_seurat$log10GenesPerUMI <- log10(merged_seurat$nFeature_RNA) / log10(merged_seurat$nCount_RNA)

# Compute percent mitochondrial gene ratio
merged_seurat$mitoRatio <- PercentageFeatureSet(object = merged_seurat, pattern = "^HyS0613.")
merged_seurat$mitoRatio <- merged_seurat@meta.data$mitoRatio / 100

# Create metadata data frame
metadata <- merged_seurat@meta.data

# Add cell IDs to metadata
metadata$cells <- rownames(metadata)

# Create sample column. this will be useful later when merging with other batches
metadata$sample <- rep("Live_cells", dim(merged_seurat)[2])

# Rename columns, "new.name = old.name"
metadata <- metadata %>%
  dplyr::rename(seq_folder = orig.ident,
                nUMI = nCount_RNA,
                nGene = nFeature_RNA)

# Add metadata back to Seurat object
merged_seurat@meta.data <- metadata

# check
tail(merged_seurat@meta.data)
##                                                      seq_folder nUMI nGene
## Seawater_TTTGGTTAGTTTCGGT-1 Seawater_filtered_feature_bc_matrix  598   432
## Seawater_TTTGGTTGTACTGACT-1 Seawater_filtered_feature_bc_matrix 2672  1466
## Seawater_TTTGTTGAGTCCGTCG-1 Seawater_filtered_feature_bc_matrix 2268   578
## Seawater_TTTGTTGAGTGCTACT-1 Seawater_filtered_feature_bc_matrix 3112  1224
## Seawater_TTTGTTGGTCAACACT-1 Seawater_filtered_feature_bc_matrix  660   270
## Seawater_TTTGTTGGTCCAAGAG-1 Seawater_filtered_feature_bc_matrix 4405  1886
##                             log10GenesPerUMI    mitoRatio
## Seawater_TTTGGTTAGTTTCGGT-1        0.9491420 0.0000000000
## Seawater_TTTGGTTGTACTGACT-1        0.9239233 0.0000000000
## Seawater_TTTGTTGAGTCCGTCG-1        0.8230696 0.0008818342
## Seawater_TTTGTTGAGTGCTACT-1        0.8839812 0.0009640103
## Seawater_TTTGTTGGTCAACACT-1        0.8623252 0.0000000000
## Seawater_TTTGTTGGTCCAAGAG-1        0.8988996 0.0002270148
##                                                   cells     sample
## Seawater_TTTGGTTAGTTTCGGT-1 Seawater_TTTGGTTAGTTTCGGT-1 Live_cells
## Seawater_TTTGGTTGTACTGACT-1 Seawater_TTTGGTTGTACTGACT-1 Live_cells
## Seawater_TTTGTTGAGTCCGTCG-1 Seawater_TTTGTTGAGTCCGTCG-1 Live_cells
## Seawater_TTTGTTGAGTGCTACT-1 Seawater_TTTGTTGAGTGCTACT-1 Live_cells
## Seawater_TTTGTTGGTCAACACT-1 Seawater_TTTGTTGGTCAACACT-1 Live_cells
## Seawater_TTTGTTGGTCCAAGAG-1 Seawater_TTTGTTGGTCCAAGAG-1 Live_cells
# OPTIONAL: 
#save(merged_seurat, file="data/merged_seurat.RData")

# Visualize the number of cell counts per sample
merged_seurat@meta.data %>% 
  ggplot(aes(x=sample, fill=sample)) + 
  geom_bar() +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
  theme(plot.title = element_text(hjust=0.5, face="bold")) +
  ggtitle("NCells")

# Visualize the number UMIs/transcripts per cell. if UMI is between 500-1000, cells should probably
# be sequenced more deeply. 
merged_seurat@meta.data %>% 
  ggplot(aes(color=sample, x=nUMI, fill= sample)) + 
  geom_density(alpha = 0.2) + 
  scale_x_log10() + 
  theme_classic() +
  ylab("Cell density") +
  geom_vline(xintercept = 300)

# Visualize the distribution of genes detected per cell via histogram
merged_seurat@meta.data %>% 
  ggplot(aes(color=sample, x=nGene, fill= sample)) + 
  geom_density(alpha = 0.2) + 
  theme_classic() +
  scale_x_log10() + 
  geom_vline(xintercept = 7000)

Two peaks. suggesting doublet problem. The first peak is mostly sperm

# Visualize the overall complexity of the gene expression by visualizing the genes detected per UMI (novelty score)
merged_seurat@meta.data %>%
  ggplot(aes(x=log10GenesPerUMI, color = sample, fill=sample)) +
  geom_density(alpha = 0.2) +
  theme_classic() +
  geom_vline(xintercept = 0.8)

# Visualize the distribution of mitochondrial gene expression detected per cell
merged_seurat@meta.data %>% 
  ggplot(aes(color=sample, x=mitoRatio, fill=sample)) + 
  geom_density(alpha = 0.2) + 
  scale_x_log10() + 
  theme_classic() +
  geom_vline(xintercept = 0.2)
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 4816 rows containing non-finite values (`stat_density()`).

# Visualize the correlation between genes detected and number of UMIs and determine whether strong presence of cells with low numbers of genes/UMIs
merged_seurat@meta.data %>% 
  ggplot(aes(x=nUMI, y=nGene, color=mitoRatio)) + 
  geom_point() + 
  scale_colour_gradient(low = "gray90", high = "black") +
  stat_smooth(method=lm) +
  scale_x_log10() + 
  scale_y_log10() + 
  theme_classic() +
  geom_vline(xintercept = 500) +
  geom_hline(yintercept = 250) +
  facet_wrap(~sample)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: colour
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

# Filter out low quality cells using selected thresholds - these will change with experiment
filtered_seurat <- subset(x = merged_seurat, 
                          subset= (nUMI >= 300 & nUMI <=100000) & 
                            (nGene >= 100 & nGene <7000) & 
                            (log10GenesPerUMI > 0.80) & # could change
                            (mitoRatio < 0.20))

filtered_seurat # 9655 cells
## An object of class Seurat 
## 22034 features across 9655 samples within 1 assay 
## Active assay: RNA (22034 features, 0 variable features)
# Gene level filtering
# Extract counts
counts <- GetAssayData(object = filtered_seurat, slot = "counts")

# Output a logical matrix specifying for each gene on whether or not there are more than zero counts per cell
nonzero <- counts > 0

# Sums all TRUE values and returns TRUE if more than 10 TRUE values per gene
keep_genes <- Matrix::rowSums(nonzero) >= 10

# Only keeping those genes expressed in more than 10 cells
filtered_counts <- counts[keep_genes, ]

# Reassign to filtered Seurat object
filtered_seurat <- CreateSeuratObject(filtered_counts, meta.data = filtered_seurat@meta.data)

filtered_seurat # 9655, did not remove any genes
## An object of class Seurat 
## 15068 features across 9655 samples within 1 assay 
## Active assay: RNA (15068 features, 0 variable features)
# OPTIONAL: 
#save(filtered_seurat, file="output/filtered_seurat.RData")
# SCtransform, PCA, UMAP, Clus

filtered_seurat <- SCTransform(filtered_seurat, vst.flavor = "v2") %>%
  RunPCA(npcs = 50) %>%
  RunUMAP(dims = 1:15) %>%
  FindNeighbors(reduction = "pca", dims = 1:15) %>%
  FindClusters(resolution = 0.5)  
## 
  |                                                                            
  |                                                                      |   0%
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## 
  |                                                                            
  |==================                                                    |  25%
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## 
  |                                                                            
  |===================================                                   |  50%
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## 
  |                                                                            
  |====================================================                  |  75%
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## 
  |                                                                            
  |======================================================================| 100%
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |=====                                                                 |   6%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |==============                                                        |  19%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=======================                                               |  32%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |===============================================                       |  68%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |========================================================              |  81%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |=================================================================     |  94%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |======================================================================| 100%
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |=====                                                                 |   6%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |==============                                                        |  19%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=======================                                               |  32%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |===============================================                       |  68%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |========================================================              |  81%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |=================================================================     |  94%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |======================================================================| 100%
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 9655
## Number of edges: 273732
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9504
## Number of communities: 21
## Elapsed time: 0 seconds
# UMAP
DimPlot(filtered_seurat,label = T) + NoLegend() + NoAxes()

# check a few known marker genes, using sctransform normalized data
FeaturePlot(filtered_seurat, features = c("HyS0061.60", "HyS0050.7", #PCNA, #piwi
                                          "HyS0008.263", "HyS0030.203", #Ncol1, Nematocilin
                                          "HyS0041.99", "HyS0004.446", #chitinase,mucs
                                          "HyS0085.53", "HyS0013.338", #ELAV, Rfamide
                                          "HyS0017.253", "HyS0010.323"), #Wnt5, Pitx
      
            order = T, min.cutoff = 'q10')
## Warning in FeaturePlot(filtered_seurat, features = c("HyS0061.60", "HyS0050.7",
## : All cells have the same value (0.693147180559945) of HyS0010.323.

FeaturePlot(filtered_seurat, features = c("HyS0027.170","HyS0001.110")) # early, late sperm

#save(filtered_seurat, file="output/Filtered_seurat_PC50_dim15_res.0.5.RData")

# remove sperm clusters

Iwant <- subset(filtered_seurat, idents = c("8","0", "7","2","1","3","12","6"), invert = T)
DimPlot(Iwant)

# now remove cells with sperm marker expression

# HyS0027.170
# HyS0070.46
# HyS4524.1
# HyS0007.253
# HyS0001.110


Iwant <- subset(Iwant, subset = HyS0027.170 > 0, invert = T )
Iwant <- subset(Iwant, subset = HyS0070.46 > 0, invert = T )
Iwant <- subset(Iwant, subset = HyS4524.1 > 0, invert = T )
Iwant <- subset(Iwant, subset = HyS0007.253 > 0, invert = T )
Iwant <- subset(Iwant, subset = HyS0001.110 > 0, invert = T )


Iwant # 4023
## An object of class Seurat 
## 30136 features across 4023 samples within 2 assays 
## Active assay: SCT (15068 features, 3000 variable features)
##  1 other assay present: RNA
##  2 dimensional reductions calculated: pca, umap
DimPlot(Iwant) # cleaner

# Visualize the distribution of genes detected per cell via histogram
Iwant@meta.data %>% 
  ggplot(aes(color=sample, x=nGene, fill= sample)) + 
  geom_density(alpha = 0.2) + 
  theme_classic() +
  scale_x_log10() + 
  geom_vline(xintercept = 7000)

# sperm problem mitigated, still not perfect.

DimPlot(Iwant, label = T)

VlnPlot(Iwant, "HyS0041.99", assay = "SCT", slot = "data") # very few cell in C16 (zymo GC)

# compare to nematocilin below

VlnPlot(Iwant, "HyS0008.263", assay = "SCT", slot = "data")

#HyS0050.7
VlnPlot(Iwant, "HyS0050.7", assay = "SCT", slot = "data")

aggr_fltd <- Iwant
aggr_fltd <- SCTransform(aggr_fltd, vst.flavor = "v2") %>%
  RunPCA(npcs = 50) %>%
  RunUMAP(dims = 1:15) %>%
  FindNeighbors(reduction = "pca", dims = 1:15) %>%
  FindClusters(resolution = 0.5)  
## 
  |                                                                            
  |                                                                      |   0%
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## 
  |                                                                            
  |==================                                                    |  25%
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## 
  |                                                                            
  |===================================                                   |  50%
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## 
  |                                                                            
  |====================================================                  |  75%
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## 
  |                                                                            
  |======================================================================| 100%
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |======================================================================| 100%
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |======================================================================| 100%
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 4023
## Number of edges: 109622
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9387
## Number of communities: 16
## Elapsed time: 0 seconds
DimPlot(aggr_fltd,label = T) + NoLegend() + NoAxes()

FeaturePlot(aggr_fltd, features = c("HyS0061.60", "HyS0050.7", #PCNA, #piwi
                                          "HyS0008.263", "HyS0030.203", #Ncol1, Nematocilin
                                          "HyS0041.99", "HyS0004.446", #chitinase,mucs
                                          "HyS0085.53", "HyS0013.338", #ELAV, Rfamide
                                          "HyS0017.253", "HyS0010.323"), #Wnt5, Pitx
            
            order = T, min.cutoff = 'q10')

getwd()
## [1] "/Users/jsong/OneDrive - University of Florida/Schnitzler_Lab/Jingwei/Whitney/Manuscript/10X_/Justin2019"
# save(aggr_fltd, file="aggr_fltd_PC50_dim15_res.0.5_v2.RData")

# sessionInfo
sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] RCurl_1.98-1.12             cowplot_1.1.1              
##  [3] scales_1.2.1                Matrix_1.5-4.1             
##  [5] lubridate_1.9.2             forcats_1.0.0              
##  [7] stringr_1.5.0               dplyr_1.1.2                
##  [9] purrr_1.0.1                 readr_2.1.4                
## [11] tidyr_1.3.0                 tibble_3.2.1               
## [13] ggplot2_3.4.2               tidyverse_2.0.0            
## [15] SeuratObject_4.1.3          Seurat_4.3.0               
## [17] SingleCellExperiment_1.18.1 SummarizedExperiment_1.26.1
## [19] Biobase_2.56.0              GenomicRanges_1.48.0       
## [21] GenomeInfoDb_1.32.4         IRanges_2.30.1             
## [23] S4Vectors_0.34.0            BiocGenerics_0.42.0        
## [25] MatrixGenerics_1.8.1        matrixStats_1.0.0          
## 
## loaded via a namespace (and not attached):
##   [1] plyr_1.8.8                igraph_1.4.3             
##   [3] lazyeval_0.2.2            sp_1.6-1                 
##   [5] splines_4.2.1             listenv_0.9.0            
##   [7] scattermore_1.1           digest_0.6.31            
##   [9] htmltools_0.5.5           fansi_1.0.4              
##  [11] magrittr_2.0.3            tensor_1.5               
##  [13] cluster_2.1.4             ROCR_1.0-11              
##  [15] tzdb_0.4.0                globals_0.16.2           
##  [17] R.utils_2.12.2            timechange_0.2.0         
##  [19] spatstat.sparse_3.0-1     colorspace_2.1-0         
##  [21] ggrepel_0.9.3             xfun_0.39                
##  [23] jsonlite_1.8.5            progressr_0.13.0         
##  [25] spatstat.data_3.0-1       survival_3.5-5           
##  [27] zoo_1.8-12                glue_1.6.2               
##  [29] polyclip_1.10-4           gtable_0.3.3             
##  [31] zlibbioc_1.42.0           XVector_0.36.0           
##  [33] leiden_0.4.3              DelayedArray_0.22.0      
##  [35] future.apply_1.11.0       abind_1.4-5              
##  [37] DBI_1.1.3                 spatstat.random_3.1-5    
##  [39] miniUI_0.1.1.1            Rcpp_1.0.10              
##  [41] viridisLite_0.4.2         xtable_1.8-4             
##  [43] reticulate_1.30           htmlwidgets_1.6.2        
##  [45] httr_1.4.6                RColorBrewer_1.1-3       
##  [47] ellipsis_0.3.2            ica_1.0-3                
##  [49] farver_2.1.1              R.methodsS3_1.8.2        
##  [51] pkgconfig_2.0.3           sass_0.4.6               
##  [53] uwot_0.1.14               deldir_1.0-9             
##  [55] utf8_1.2.3                labeling_0.4.2           
##  [57] tidyselect_1.2.0          rlang_1.1.1              
##  [59] reshape2_1.4.4            later_1.3.1              
##  [61] munsell_0.5.0             tools_4.2.1              
##  [63] cachem_1.0.8              cli_3.6.1                
##  [65] generics_0.1.3            ggridges_0.5.4           
##  [67] evaluate_0.21             fastmap_1.1.1            
##  [69] yaml_2.3.7                goftest_1.2-3            
##  [71] knitr_1.43                fitdistrplus_1.1-11      
##  [73] RANN_2.6.1                sparseMatrixStats_1.8.0  
##  [75] pbapply_1.7-0             future_1.32.0            
##  [77] nlme_3.1-162              mime_0.12                
##  [79] ggrastr_1.0.2             R.oo_1.25.0              
##  [81] compiler_4.2.1            rstudioapi_0.14          
##  [83] beeswarm_0.4.0            plotly_4.10.2            
##  [85] png_0.1-8                 spatstat.utils_3.0-3     
##  [87] glmGamPoi_1.8.0           bslib_0.5.0              
##  [89] stringi_1.7.12            highr_0.10               
##  [91] lattice_0.21-8            vctrs_0.6.2              
##  [93] pillar_1.9.0              lifecycle_1.0.3          
##  [95] spatstat.geom_3.2-1       lmtest_0.9-40            
##  [97] jquerylib_0.1.4           RcppAnnoy_0.0.20         
##  [99] data.table_1.14.8         bitops_1.0-7             
## [101] irlba_2.3.5.1             httpuv_1.6.11            
## [103] patchwork_1.1.2           R6_2.5.1                 
## [105] promises_1.2.0.1          KernSmooth_2.23-21       
## [107] gridExtra_2.3             vipor_0.4.5              
## [109] parallelly_1.36.0         codetools_0.2-19         
## [111] MASS_7.3-60               withr_2.5.0              
## [113] sctransform_0.3.5         GenomeInfoDbData_1.2.8   
## [115] mgcv_1.8-42               parallel_4.2.1           
## [117] hms_1.1.3                 grid_4.2.1               
## [119] DelayedMatrixStats_1.18.2 rmarkdown_2.22           
## [121] Rtsne_0.16                spatstat.explore_3.2-1   
## [123] shiny_1.7.4               ggbeeswarm_0.7.2